load collision data
data_2018 =
read_csv("./data/2018data.csv") %>%
janitor::clean_names()
## Parsed with column specification:
## cols(
## .default = col_character(),
## TIME = col_time(format = ""),
## `ZIP CODE` = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## `NUMBER OF PERSONS INJURED` = col_double(),
## `NUMBER OF PERSONS KILLED` = col_double(),
## `NUMBER OF PEDESTRIANS INJURED` = col_double(),
## `NUMBER OF PEDESTRIANS KILLED` = col_double(),
## `NUMBER OF CYCLIST INJURED` = col_double(),
## `NUMBER OF CYCLIST KILLED` = col_double(),
## `NUMBER OF MOTORIST INJURED` = col_double(),
## `NUMBER OF MOTORIST KILLED` = col_double(),
## COLLISION_ID = col_double()
## )
## See spec(...) for full column specifications.
newnames = colnames(data_2018) %>%
str_replace("number_of_","")
names(data_2018) = newnames
tidy_data =
data_2018 %>%
mutate(
date_complete = date
) %>%
separate(date, into = c("month", "day", "year"), sep = "/") %>%
separate(time, into = c("hour", "minute"), sep = ":") %>%
mutate(
date_paste = as.Date(paste("2018",month,day,sep = "-"))
) %>%
select(-zip_code, -location, -on_street_name, -cross_street_name, -off_street_name,-collision_id,-year) %>%
rename("vehicle_type" = "vehicle_type_code_1",
"cause" = "contributing_factor_vehicle_1") %>%
mutate( day = as.numeric(day),
month = as.numeric(month),
hour = as.numeric(hour),
minute = as.numeric(minute),
latitude = replace_na(latitude,0),
vehicle_type = str_to_lower(vehicle_type)
) %>%
filter( latitude != 0) %>%
drop_na(borough)
Vehicle type
vehicle_type_data =
tidy_data %>%
mutate(
vehicle_type = replace(vehicle_type,str_detect(vehicle_type,"truck"),"truck"),
vehicle_type = replace(vehicle_type,str_detect(vehicle_type,"sport utility"),"sport utility vehicle"),
vehicle_type = replace(vehicle_type, vehicle_type %in% c("taxi","passenger vehicle","sedan","truck","sport utility vehicle") == FALSE, "others")) %>%
group_by(vehicle_type,hour) %>%
summarize(
n = n()
)
vehicle_type_plot =
vehicle_type_data %>%
ggplot(aes(x = hour, y = n, color = vehicle_type)) +
geom_line() +
labs(
color = "vehicle type",
title = "Collisions of Day for Different Vehicles",
x = list(title = "Hour of Day"),
y = list(title = "Collisions")
)
ggplotly(vehicle_type_plot)
Top 10 Collision Reasons
# reason_data =
# tidy_data %>%
# group_by(contributing_factor_vehicle_1) %>%
# summarize(n = n()) %>%
# arrange(desc(n)) %>%
# head(10)
# reason_data %>%
# plot_ly(x = ~reorder(contributing_factor_vehicle_1,desc(n)), y = ~n, color = ~contributing_factor_vehicle_1 ,type = "bar") %>%
# layout(
# title = "The Number of Items Ordered in Each Aisle",
# xaxis = list(title = "Different Reasons"),
# yaxis = list(title = "Count")
# )
df_cause =
tidy_data %>%
select(borough,cause) %>%
filter(!(cause %in% c('',
'Unspecified',
'Driver Inattention/Distraction',
'Other Vehicular',
'Failure to Yield Right-of-Way'))
)
df_cause_top =
df_cause %>%
group_by(cause) %>%
summarise(count = n()) %>%
top_n(10, count) %>%
ungroup() %>%
arrange(count)
cause_by_boro =
df_cause %>%
semi_join(., df_cause_top, by = "cause") %>%
group_by(borough, cause) %>%
summarise(count = n()) %>%
# reorder cause levels for desirable order in bar chart display:
mutate(
cause = factor(cause, levels = pull(df_cause_top,cause))
)
cause_by_boro_all =
cause_by_boro %>%
group_by(borough) %>%
summarise(total = sum(count))
cause_by_boro =
left_join(cause_by_boro, cause_by_boro_all, by = "borough") %>%
mutate(ratio = count / total) %>%
select(-count, -total)
cause_by_boro_plot =
cause_by_boro %>%
ggplot(aes(x = cause, y = ratio, fill = borough)) +
geom_bar(width = 0.5, stat = 'identity', show.legend = F) +
coord_flip() +
facet_grid(.~borough) +
labs(title = 'Collisions by Causes',
x = NULL,
y = NULL) +
theme(axis.text.x = element_text(angle = 45))
ggplotly(cause_by_boro_plot)
Mapping
data_2018 = tidy_data
map_data = rename(data_2018, long = latitude, lat = longitude)
pal <- colorNumeric(
palette = "Accent",
domain = map_data$persons_injured)
map_data %>%
filter(persons_injured > 0) %>%
filter(!(lat < "-70" | lat >= "-75")) %>%
mutate(
label = str_c("<b>vehicle type: ", vehicle_type, "</b><br>Persons Injured: ", persons_injured , sep = "") ) %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addLegend("bottomright", pal = pal, values = ~persons_injured,
title = "Persons Injured",
opacity = 1
) %>%
addCircleMarkers(
~lat, ~long,
color = ~pal(persons_injured),
radius = 0.5,
popup = ~ label)
data_2018_seperate = tidy_data
Association between Day and People Injured or killed or Accident Number
data_day %>%
filter(type == "accident number") %>%
arrange(desc(number))
## # A tibble: 365 x 3
## # Groups: date [365]
## date type number
## <date> <chr> <dbl>
## 1 2018-11-15 accident number 695
## 2 2018-12-14 accident number 538
## 3 2018-11-09 accident number 532
## 4 2018-03-02 accident number 531
## 5 2018-03-22 accident number 521
## 6 2018-06-29 accident number 515
## 7 2018-09-06 accident number 515
## 8 2018-05-11 accident number 509
## 9 2018-06-14 accident number 509
## 10 2018-09-05 accident number 509
## # … with 355 more rows
plot_day =
data_day %>%
ggplot(aes(x = date,y = number, color = type))+
geom_line()+
theme(
axis.title = element_text(size=14,face="bold"),
plot.title = element_text(hjust = 0.5,size=14,face="bold"))+
labs(
title = "Trend Over the Year",
x = "Day of the Year",
y = "Number") +
annotate("text", x = as.Date("2018-11-15") , y = 695, label = "2018-11-15")
plot_kill_injured_day = ggplotly(plot_day)
plot_kill_injured_day
proportion of accident and injured people by borough, by hour
boro_events =
tidy_data %>%
drop_na(borough, persons_injured, persons_killed) %>%
group_by(borough) %>%
summarize(accident = n(),
injured = sum(persons_injured),
killed = sum(persons_killed)) %>%
mutate(prop_accident = round(accident/sum(accident),4),
prop_injured = round(injured/sum(injured),4),
prop_killed = round(killed/sum(killed),4)) %>%
pivot_longer(
prop_accident:prop_killed,
values_to = "proportion",
names_prefix = "prop_",
names_to = "events",
)
boro_events_plot =
boro_events %>%
ggplot(aes(x = borough, y = proportion, fill = events)) +
geom_bar(stat = "identity",position = "dodge") +
labs(title ="proportion of accident and injured people by borough",
x = 'Borough',
y = 'Proportion'
)
ggplotly(boro_events_plot)
Number of accident and injured people in Brooklyn is greatest comparing with other boroughs. Queen is second. Staten Island have fewest accident and injured people. Manhattan and Bronx is in the middle.
hour_events =
tidy_data %>%
drop_na(hour, persons_injured, persons_killed) %>%
group_by(hour) %>%
summarize(accident = n(),
injured = sum(persons_injured),
killed = sum(persons_killed)) %>%
mutate(prop_accident = round(accident/sum(accident),4),
prop_injured = round(injured/sum(injured),4),
prop_killed = round(killed/sum(killed),4)) %>%
pivot_longer(
prop_accident:prop_killed,
values_to = "proportion",
names_prefix = "prop_",
names_to = "events",
)
hour_events_plot =
hour_events %>%
ggplot(aes(x = hour, y = proportion, color = events)) +
geom_line() +
annotate("rect", xmin = 8, xmax = 20, ymin = 0, ymax = Inf, fill = "blue", alpha = .1) +
labs(
title = "proportion of accident and injured people by hour",
x = 'Hour',
y = 'Proportion')
hour_events_plot = ggplotly(hour_events_plot)
hour_events_plot[['x']][['layout']][['shapes']] <- c()
hour_events_plot = layout(hour_events_plot, shapes = list(type = "rect", fillcolor = "pink", line = list(color = "pink"), opacity = 0.3, x0 = 8, x1 = 20, xref = "x", y0 = 0, y1 = 0.08, yref = "y"))
hour_events_plot
Frequency of accident and injured people in the period of 8 am-20 pm is higher than other period of time. Started from 3 am, number of accident and injured people increase and reach a small peak at 8am. At 5pm, it reaches a big peak. then it started to decrease.
Holiday Analysis
data_2018 = tidy_data
holiday_data =
data_2018 %>%
select(date_complete, everything()) %>%
mutate(holiday = "No Holiday") %>%
select(date_complete, holiday, everything()) %>%
mutate(holiday = replace(holiday, date_complete == "01/01/2018", "New Year's Day"),
holiday = replace(holiday, date_complete == "01/15/2018", "Martin Luther King, Jr. Day"),
holiday = replace(holiday, date_complete == "01/13/2018"|date_complete =="01/14/2018", "Martin Luther King Day Weekend (2 days)"),
holiday = replace(holiday, date_complete == "02/19/2018", "Presidents Day"),
holiday = replace(holiday, date_complete == "02/17/2018"|date_complete =="02/18/2018", "Presidents Day Weekend (2 days)"),
holiday = replace(holiday, date_complete == "05/28/2018", "Memorial Day"),
holiday = replace(holiday, date_complete == "05/26/2018"|date_complete =="05/27/2018", "Memorial Day Weekend (2 days)"),
holiday = replace(holiday, date_complete == "07/04/2018", "Independence Day"),
holiday = replace(holiday, date_complete == "07/07/2018"|date_complete =="07/08/2018", "Independence Day Weekend (2 days)"),
holiday = replace(holiday, date_complete == "09/03/2018", "Labor Day"),
holiday = replace(holiday, date_complete == "09/01/2018"|date_complete =="09/02/2018", "Labor Day Weekend (2 days)"),
holiday = replace(holiday, date_complete == "11/22/2018", "Thanksgiving Day"),
holiday = replace(holiday, date_complete == "11/23/2018"|date_complete =="11/24/2018"|date_complete =="11/25/2018", "Thanksgiving Weekend (3 days)"),
holiday = replace(holiday, date_complete == "12/25/2018", "Christmas Day"),
holiday = replace(holiday, date_complete == "12/26/2018"|date_complete =="12/27/2018", "Christmas Day Weekend (2 days)")
)
holiday_data =
holiday_data %>%
drop_na(persons_injured)
holiday_data %>%
group_by(holiday) %>%
summarise(total_injured = sum(persons_injured))
## # A tibble: 16 x 2
## holiday total_injured
## <chr> <dbl>
## 1 Christmas Day 70
## 2 Christmas Day Weekend (2 days) 200
## 3 Independence Day 72
## 4 Independence Day Weekend (2 days) 184
## 5 Labor Day 111
## 6 Labor Day Weekend (2 days) 199
## 7 Martin Luther King Day Weekend (2 days) 181
## 8 Martin Luther King, Jr. Day 106
## 9 Memorial Day 93
## 10 Memorial Day Weekend (2 days) 207
## 11 New Year's Day 68
## 12 No Holiday 34205
## 13 Presidents Day 78
## 14 Presidents Day Weekend (2 days) 155
## 15 Thanksgiving Day 52
## 16 Thanksgiving Weekend (3 days) 252
bronx =
holiday_data %>%
filter(borough == "BRONX") %>%
group_by(holiday) %>%
summarise(bronx_total_injured = sum(persons_injured))
brooklyn =
holiday_data %>%
filter(borough == "BROOKLYN") %>%
group_by(holiday) %>%
summarise(brooklyn_total_injured = sum(persons_injured)) %>%
select(brooklyn_total_injured)
manhattan =
holiday_data %>%
filter(borough == "MANHATTAN") %>%
group_by(holiday) %>%
summarise(manhattan_total_injured = sum(persons_injured))%>%
select(manhattan_total_injured)
queens =
holiday_data %>%
filter(borough == "QUEENS") %>%
group_by(holiday) %>%
summarise(queens_total_injured = sum(persons_injured))%>%
select(queens_total_injured)
staten_island =
holiday_data %>%
filter(borough == "STATEN ISLAND") %>%
group_by(holiday) %>%
summarise(staten_island_total_injured = sum(persons_injured))%>%
select(staten_island_total_injured)
holiday_borough =
cbind(bronx, brooklyn, manhattan, queens, staten_island) %>%
mutate(total = bronx_total_injured+brooklyn_total_injured+manhattan_total_injured+ queens_total_injured+staten_island_total_injured) %>%
filter(holiday!="No Holiday")
knitr::kable(holiday_borough)
| Christmas Day |
10 |
25 |
5 |
24 |
6 |
70 |
| Christmas Day Weekend (2 days) |
34 |
99 |
23 |
41 |
3 |
200 |
| Independence Day |
18 |
32 |
3 |
18 |
1 |
72 |
| Independence Day Weekend (2 days) |
26 |
60 |
38 |
54 |
6 |
184 |
| Labor Day |
3 |
60 |
13 |
32 |
3 |
111 |
| Labor Day Weekend (2 days) |
31 |
89 |
10 |
62 |
7 |
199 |
| Martin Luther King Day Weekend (2 days) |
23 |
58 |
40 |
53 |
7 |
181 |
| Martin Luther King, Jr. Day |
27 |
42 |
14 |
19 |
4 |
106 |
| Memorial Day |
16 |
38 |
12 |
24 |
3 |
93 |
| Memorial Day Weekend (2 days) |
36 |
70 |
27 |
67 |
7 |
207 |
| New Year’s Day |
10 |
34 |
9 |
14 |
1 |
68 |
| Presidents Day |
5 |
32 |
8 |
32 |
1 |
78 |
| Presidents Day Weekend (2 days) |
30 |
42 |
28 |
51 |
4 |
155 |
| Thanksgiving Day |
13 |
17 |
5 |
17 |
0 |
52 |
| Thanksgiving Weekend (3 days) |
42 |
74 |
39 |
93 |
4 |
252 |
holiday_borough_day =
holiday_borough %>%
filter(holiday =="Christmas Day"|holiday =="Independence Day"|holiday =="Labor Day"|holiday =="Martin Luther King, Jr. Day"|holiday =="Memorial Day"|holiday =="New Year’s Day"|holiday =="Thanksgiving Day"|holiday =="Presidents Day")
knitr::kable(holiday_borough_day)
| Christmas Day |
10 |
25 |
5 |
24 |
6 |
70 |
| Independence Day |
18 |
32 |
3 |
18 |
1 |
72 |
| Labor Day |
3 |
60 |
13 |
32 |
3 |
111 |
| Martin Luther King, Jr. Day |
27 |
42 |
14 |
19 |
4 |
106 |
| Memorial Day |
16 |
38 |
12 |
24 |
3 |
93 |
| Presidents Day |
5 |
32 |
8 |
32 |
1 |
78 |
| Thanksgiving Day |
13 |
17 |
5 |
17 |
0 |
52 |
holiday_borough_day = holiday_borough_day[order(holiday_borough_day$total),]
# On the day of the holiday
plot_day =
pivot_longer(
holiday_borough_day,
bronx_total_injured:staten_island_total_injured,
names_to = "borough",
values_to = "person_injured") %>%
ggplot(aes(x = reorder(holiday, -total), y = person_injured, fill = borough)) +
geom_bar(stat="identity") +
theme(legend.position = "right", axis.text.x = element_text(angle = 45)) +
labs(
title = "Persons Injured by Car Crashs \n on Federal Holidays (2018)",
x = "Holiday",
y = "# Persons Injured") +
scale_fill_discrete(name = "Borough", labels = c("Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island"))
# scale_fill_discrete(name = "Borough", labels = c("Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island"))
On three day weekends
data_2018 = tidy_data
holiday_data_weekend =
data_2018 %>%
select(date_complete, everything()) %>%
mutate(holiday = "No Holiday") %>%
select(date_complete, holiday, everything()) %>%
mutate(holiday = replace(holiday, date_complete == "01/15/2018"|date_complete == "01/13/2018"|date_complete =="01/14/2018", "Martin Luther King Day Weekend (3 days)"),
holiday = replace(holiday, date_complete == "02/17/2018"|date_complete == "02/19/2018"|date_complete =="02/18/2018", "Presidents Day Weekend (3 days)"),
holiday = replace(holiday, date_complete == "05/28/2018"|date_complete == "05/26/2018"|date_complete =="05/27/2018", "Memorial Day Weekend (3 days)"),
holiday = replace(holiday, date_complete == "07/04/2018"|date_complete == "07/07/2018"|date_complete =="07/08/2018", "Independence Day Weekend (3 days)"),
holiday = replace(holiday, date_complete == "09/03/2018"|date_complete == "09/01/2018"|date_complete =="09/02/2018", "Labor Day Weekend (3 days)"),
holiday = replace(holiday, date_complete == "11/22/2018"|date_complete == "11/23/2018"|date_complete =="11/24/2018"|date_complete =="11/25/2018", "Thanksgiving Weekend (4 days)"),
holiday = replace(holiday, date_complete == "12/25/2018"|date_complete == "12/26/2018"|date_complete =="12/27/2018", "Christmas Day Weekend (3 days)")
)
holiday_data_weekend =
holiday_data_weekend %>%
drop_na(persons_injured)
holiday_data_weekend %>%
group_by(holiday) %>%
summarise(total_injured = sum(persons_injured))
## # A tibble: 8 x 2
## holiday total_injured
## <chr> <dbl>
## 1 Christmas Day Weekend (3 days) 270
## 2 Independence Day Weekend (3 days) 256
## 3 Labor Day Weekend (3 days) 310
## 4 Martin Luther King Day Weekend (3 days) 287
## 5 Memorial Day Weekend (3 days) 300
## 6 No Holiday 34273
## 7 Presidents Day Weekend (3 days) 233
## 8 Thanksgiving Weekend (4 days) 304
bronx =
holiday_data_weekend %>%
filter(borough == "BRONX") %>%
group_by(holiday) %>%
summarise(bronx_total_injured = sum(persons_injured))
brooklyn =
holiday_data_weekend %>%
filter(borough == "BROOKLYN") %>%
group_by(holiday) %>%
summarise(brooklyn_total_injured = sum(persons_injured)) %>%
select(brooklyn_total_injured)
manhattan =
holiday_data_weekend %>%
filter(borough == "MANHATTAN") %>%
group_by(holiday) %>%
summarise(manhattan_total_injured = sum(persons_injured))%>%
select(manhattan_total_injured)
queens =
holiday_data_weekend %>%
filter(borough == "QUEENS") %>%
group_by(holiday) %>%
summarise(queens_total_injured = sum(persons_injured))%>%
select(queens_total_injured)
staten_island =
holiday_data_weekend %>%
filter(borough == "STATEN ISLAND") %>%
group_by(holiday) %>%
summarise(staten_island_total_injured = sum(persons_injured))%>%
select(staten_island_total_injured)
holiday_borough_weekend =
cbind(bronx, brooklyn, manhattan, queens, staten_island) %>%
mutate(total = bronx_total_injured+brooklyn_total_injured+manhattan_total_injured+ queens_total_injured+staten_island_total_injured) %>%
filter(holiday!="No Holiday")
knitr::kable(holiday_borough_weekend)
| Christmas Day Weekend (3 days) |
44 |
124 |
28 |
65 |
9 |
270 |
| Independence Day Weekend (3 days) |
44 |
92 |
41 |
72 |
7 |
256 |
| Labor Day Weekend (3 days) |
34 |
149 |
23 |
94 |
10 |
310 |
| Martin Luther King Day Weekend (3 days) |
50 |
100 |
54 |
72 |
11 |
287 |
| Memorial Day Weekend (3 days) |
52 |
108 |
39 |
91 |
10 |
300 |
| Presidents Day Weekend (3 days) |
35 |
74 |
36 |
83 |
5 |
233 |
| Thanksgiving Weekend (4 days) |
55 |
91 |
44 |
110 |
4 |
304 |
plot_weekend =
pivot_longer(
holiday_borough_weekend,
bronx_total_injured:staten_island_total_injured,
names_to = "borough",
values_to = "person_injured") %>%
ggplot(aes(x = reorder(holiday, -total), y = person_injured, fill = borough)) +
geom_bar(stat="identity") +
theme(legend.position = "right", axis.text.x = element_text(angle = 45)) +
labs(
title = "Persons Injured by Car Crashs on \n Federal Holidays Weekends (2018)",
x = "Holiday Weekends",
y = "# Persons Injured") +
scale_fill_discrete(name = "Borough", labels = c("Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island"))
plot_day + plot_weekend

Weather
climate_df =
read_csv("./large_data/climate.csv") %>%
janitor::clean_names() %>%
select(date,hourly_dry_bulb_temperature,hourly_precipitation,daily_weather) %>%
separate(date, into = c("date", "time"), sep = " ") %>%
mutate(
date = as.Date(date)
)
## Parsed with column specification:
## cols(
## .default = col_double(),
## DATE = col_datetime(format = ""),
## REPORT_TYPE = col_character(),
## BackupDirection = col_logical(),
## BackupDistance = col_logical(),
## BackupDistanceUnit = col_logical(),
## BackupElements = col_logical(),
## BackupElevation = col_logical(),
## BackupElevationUnit = col_logical(),
## BackupEquipment = col_logical(),
## BackupLatitude = col_logical(),
## BackupLongitude = col_logical(),
## BackupName = col_logical(),
## DailyPeakWindDirection = col_character(),
## DailyPrecipitation = col_character(),
## DailySnowDepth = col_character(),
## DailySnowfall = col_character(),
## DailySustainedWindDirection = col_character(),
## DailyWeather = col_character(),
## HeavyFog = col_logical(),
## HourlyPrecipitation = col_character()
## # ... with 29 more columns
## )
## See spec(...) for full column specifications.
all_day_weather_df =
climate_df %>% filter(time == "23:59:00") %>%
distinct(date,daily_weather) %>%
filter(!duplicated(date)) %>%
separate(daily_weather, into = c("weathertype", "other"), sep = 2) %>%
mutate(
weathertype = recode(weathertype,
"SN" = "snow",
"HZ" = "haze",
"RA" = "rain",
"FG" = "fog",
"BR" = "mist"
),
weathertype = replace_na(weathertype,"sunny")) %>%
select(-other)
collision_all =
tidy_data %>%
group_by(date_complete) %>%
summarise(
collision_event = n(),
injured_event = sum(persons_injured != 0, na.rm = TRUE),
killed_event = sum(persons_killed != 0, na.rm = TRUE)
)
collision_weather =
bind_cols(all_day_weather_df,collision_all) %>%
select(-date_complete)
Weather type
weather_type_plot =
collision_weather %>%
group_by(weathertype) %>%
summarise(
count = mean(collision_event)
) %>%
mutate(
weathertype = fct_reorder(weathertype,count)
) %>%
ggplot(aes(x = weathertype, y = count,
fill = weathertype)) +
geom_bar(stat = "identity", width = 0.6) +
coord_flip()
ggplotly(weather_type_plot)
Daily weather
daily_weather_df =
climate_df %>%
separate(time, into = c("hour", "minute","second"), sep = ":") %>%
separate(date, into = c("year","month", "day"), sep = "-") %>%
separate(hourly_precipitation, into = c("prep","other"), sep = 4) %>%
select(-minute,-second,-year,-daily_weather,-other) %>%
mutate(
month = as.numeric(month),
day = as.numeric(day),
hour = as.numeric(hour),
prep = replace_na(as.numeric(prep),0)
)
daily_weather_final = daily_weather_df[rep(seq_len(nrow(daily_weather_df)), each = 3), ]
daily_collision_data =
tidy_data %>%
select(month,day,hour,persons_injured,persons_killed) %>%
group_by(month,day,hour) %>%
summarise(
accident = n(),
injured = sum(persons_injured,na.rm = TRUE),
killed = sum(persons_killed,na.rm = TRUE)
) %>%
pivot_longer(
accident:killed,
names_to = "events",
values_to = "number"
)
daily_weather_collision =
left_join(daily_collision_data, daily_weather_df,
by = c("month","day","hour")) %>%
mutate(
prep_degree =
if_else(prep %in% 0:0.10,"light",
if_else( prep < 0.3 & prep > 0.11 , "moderate","heavy")),
prep_degree = factor(prep_degree, levels = c("heavy","moderate","light")))
daily_weather_plot =
daily_weather_collision %>%
ggplot(aes(hour, y = number, color = events)) +
geom_point(alpha = 0.5) +
facet_grid(~prep_degree) +
labs(
color = "events"
)
ggplotly(daily_weather_plot)